# setwd("~/dropbox/projects/comments")


library(readstata13)
library(haven)
library(tidyverse)
library(psych)
library(labelled)
library(estimatr)
library(emmeans)
library(stringi)
library(lfe)
library(stargazer)
library(scales)
library(xtable)
library(quanteda)
library(tidytext)
library(tictoc)
library(readstata13)
library(stringr)
library(patchwork)


# The following code has three sections -- 1. Pre-processing; 2. Main Results; 3. Appendix. Please note that ran Section 1 (pre-processing) and commented out this section, and then shared the processed data in order to remove potentially sensitive personal information, which is saved as an Rdata file at the end of Section 1. All of the figures and tables can be replicated by running Sections 2 and 3.

# ########################################################################
# ########################################################################
# ########################## 1 Pre-processing ############################
# ########################################################################
# ########################################################################
# 
# anes <-read_dta("anes_timeseries_2016.dta")
# yg<-read_dta("yg_coded.dta")
# load("pew thru w39.RData")
# load("fb master.RData")
# labelr<-read.csv("labelr.coded.csv") # human labelled data (carson montgomery `labelr` package)
# top5k<-read_csv("top5k uncivil.csv")
# 
# yg<-select(yg, # selecting only necessary variables
#            pid7, fb_freq, fb_content_4,
#            senate_term: house_term,
#            libcon, trump_approval,polint, 
#            post_id1: post_id3,
#            show_comment_1:show_comment_3,
#            post_message1:post_message3,
#            comment1:comment3,
#            toxic_feature_1_1, toxic_feature_1_2, toxic_feature_1_3,
#            toxic_feature_2_1, toxic_feature_2_2, toxic_feature_2_3,
#            write1a, write2a, write3a, outlet_id1, outlet_id2, outlet_id3,
#            outlet1,outlet2,outlet3,
#            toxic1, toxic2, toxic3,
#            weight,
#            caseid,
#            gender_o, race_m_1, birthyr, educ, pid3
# )
# 
# 
# fb$post_text<-tolower(fb$post_text)
# fb$wordcount<-sapply(strsplit(fb$comment_text, " "), length)
# fb$kavanaugh.issue<-str_detect(fb$post_text,"kavanaugh|ford|ramirez|swetnick|supreme")
# fb$midterm<-str_detect(fb$post_text,"midterm|election")
# fb<-fb[!is.na(fb$comment_text),]# dropping comments without texts (GIFs, etc)
# 
# 
# 
# 
# 
# ########################################################################
# ########################## RECODING ####################################
# ########################################################################
# 
# 
# ########################## ANES ####################################
# 
# anes$weight<-anes$V160102
# anes$pol.interest<-ifelse(anes$V162256>0,(4-anes$V162256)/3,NA)
# 
# anes$pid[anes$V161155==2&anes$V161156==1]<-1 #identify as Strong Republican
# anes$pid[anes$V161155==2&anes$V161156==2]<-2 #identify as not very strong Republicans
# anes$pid[anes$V161157==1]<-3 #independent and closer to the Republican Party
# anes$pid[anes$V161157==2]<-4 #independent and closer to neither
# anes$pid[anes$V161157==3]<-5 #independent and closer to the Dem Party 
# anes$pid[anes$V161155==1&anes$V161156==2]<-6 #not very strong dems 
# anes$pid[anes$V161155==1&anes$V161156==1]<-7 #identify as strong dems 
# anes$dem<-ifelse(anes$pid>=5,1,
#                  ifelse(anes$pid<=3,0,NA))
# anes$pidstr<-ifelse(anes$pid==1|anes$pid==7,1,0)
# 
# anes$pk1<-ifelse(na_if(anes$V161513,-5)==6,1,0)
# anes$pk2<-ifelse(na_if(anes$V161514,-5)==1,1,0)
# anes$pk3<-ifelse(na_if(anes$V161515,-5)==2,1,0)
# anes$pk4<-ifelse(na_if(anes$V161516,-5)==2,1,0)
# 
# psych::alpha(select(anes,pk1:pk4)) # row reliability alpha = 0.5
# anes$pk<-rowMeans(select(anes,pk1:pk4))
# anes$liberal<-ifelse(anes$V162171<0,NA,
#                      ifelse(anes$V162171==99,0.5,(7-anes$V162171)/6))
# 
# anes$c1<-ifelse(anes$V162018e==1,1, # commenting
#                 ifelse(anes$V162018e==2,0,NA))
# 
# anes$rep_therm<-ifelse(anes$V161096>=0 & anes$V161096<=100, anes$V161096/100,NA)
# anes$dem_therm<-ifelse(anes$V161095>=0 & anes$V161095<=100, anes$V161095/100,NA)
# anes$sci_therm<-ifelse(anes$V162112>=0 & anes$V162112<=100, anes$V162112/100, NA)
# anes$fem_therm<-ifelse(anes$V162096>=0 & anes$V162096<=100, anes$V162096/100,NA)
# anes$clinton_therm<-ifelse(anes$V161086>=0 & anes$V161086<=100, anes$V161086/100,NA)
# anes$trump_therm<-ifelse(anes$V161087>=0 & anes$V161087<=100, anes$V161087/100,NA)
# anes$evan_therm<-ifelse(anes$V162095>=0 & anes$V162095<=100, anes$V162095/100,NA)
# 
# 
# 
# anes$imm1<-ifelse(anes$V162157>0,(anes$V162157-1)/4,NA)
# anes$imm2<-ifelse(anes$V162158>0,(4-anes$V162158)/3,NA)
# anes$imm3<-ifelse(anes$V162268>0,(anes$V162268-1)/4,NA)
# anes$imm4<-ifelse(anes$V162269>0,(5-anes$V162269)/4,NA)
# anes$imm5<-ifelse(anes$V162270>0,(5-anes$V162270)/4,NA)
# 
# psych::alpha(select(anes,imm1:imm5)) # alpha = 0.83
# anes$imm<-rowMeans(select(anes,imm1:imm5))
# 
# 
# anes$nte1<-ifelse(anes$V162248>0,(anes$V162248-1)/4,NA)
# anes$nte2<-ifelse(anes$V162249>0,(anes$V162249-1)/4,NA)
# anes$nte3<-ifelse(anes$V162250>0,(anes$V162250-1)/4,NA)
# anes$nte4<-ifelse(anes$V162251>0,(anes$V162251-1)/4,NA)
# anes$nte5<-ifelse(anes$V162252>0,(anes$V162252-1)/4,NA)
# anes$nte6<-ifelse(anes$V162253>0,(anes$V162253-1)/4,NA)
# 
# psych::alpha(select(anes,nte1,nte3,nte6)) # alpha 0.74
# 
# anes$nte<-rowMeans(select(anes,nte1,nte3,nte6))
# 
# anes$abortion<-ifelse(anes$V161232==5,NA,
#                       ifelse(anes$V161232>0, (4-anes$V161232)/3,NA))
# anes$aca<-ifelse(anes$V161114x>0,(7-anes$V161114x)/6,NA)
# 
# 
# 
# 
# ##########################PEW recoding##############################
# 
# 
# names(pew) <- tolower(names(pew))
# pew<-pew[,-1025]
# # commenting behavior
# pew$comment<-recode(pew$talkpolsnsint_w19, '1'=3, '2'=2, '3'=1, '4'=0)
# pew$comment<-na_if(pew$comment,99)
# pew$comment<-zap_labels(pew$comment)
# pew$as.factor(comment)<-pew$comment/3
# pew$c2<-ifelse(pew$comment>=2,1,0)
# 
# 
# pew$polinterest<-recode(pew$q6_w7, '4'=1, '3'=2, '2'=3, '1'=4, '99'=99)
# 
# 
# pew$pid<-na_if(recode(pew$f_partysum_final, '3'=0, .default=99),99)
# pew$pid[pew$f_partyln_final==2]<-1
# pew$pid[pew$f_party_final==2]<-2 
# pew$pid[pew$f_partyln_final==1]<-3
# pew$pid[pew$f_party_final==1] <-4
# pew$pid[pew$partyln_w1==99 & is.na(pew$pid)==1]<-0
# pew$pid[pew$partyln_w1==2 & is.na(pew$pid)==1]<-1
# pew$pid[pew$party_w1==2 & is.na(pew$pid)==1]<-2
# pew$pid[pew$partyln_w1==1 & is.na(pew$pid)==1]<-3
# pew$pid[pew$party_w1==1 & is.na(pew$pid)==1]<-4
# pew$pid[pew$partyln_w5==99 & is.na(pew$pid)==1]<-0
# pew$pid[pew$partyln_w5==2 & is.na(pew$pid)==1]<-1
# pew$pid[pew$party_w5==2 & is.na(pew$pid)==1]<-2
# pew$pid[pew$partyln_w5==1 & is.na(pew$pid)==1]<-3
# pew$pid[pew$party_w5==1 & is.na(pew$pid)==1]<-4
# pew$pid[pew$partyln_w10==99 & is.na(pew$pid)==1]<-0
# pew$pid[pew$partyln_w10==2 & is.na(pew$pid)==1]<-1
# pew$pid[pew$party_w10==2 & is.na(pew$pid)==1]<-2
# pew$pid[pew$partyln_w10==1 & is.na(pew$pid)==1]<-3
# pew$pid[pew$party_w10==1 & is.na(pew$pid)==1]<-4
# pew$pid[pew$partyln_w16==99 & is.na(pew$pid)==1]<-0
# pew$pid[pew$partyln_w16==2 & is.na(pew$pid)==1]<-1
# pew$pid[pew$party_w16==2 & is.na(pew$pid)==1]<-2
# pew$pid[pew$partyln_w16==1 & is.na(pew$pid)==1]<-3
# pew$pid[pew$party_w16==1 & is.na(pew$pid)==1]<-4
# pew$pid[pew$partyln_w23==99 & is.na(pew$pid)==1]<-0
# pew$pid[pew$partyln_w23==2 & is.na(pew$pid)==1]<-1
# pew$pid[pew$party_w23==2 & is.na(pew$pid)==1]<-2
# pew$pid[pew$partyln_w23==1 & is.na(pew$pid)==1]<-3
# pew$pid[pew$party_w23==1 & is.na(pew$pid)==1]<-4
# pew$pid[pew$partyln_w29==99 & is.na(pew$pid)==1]<-0
# pew$pid[pew$partyln_w29==2 & is.na(pew$pid)==1]<-1
# pew$pid[pew$party_w29==2 & is.na(pew$pid)==1]<-2
# pew$pid[pew$partyln_w29==1 & is.na(pew$pid)==1]<-3
# pew$pid[pew$party_w29==1 & is.na(pew$pid)==1]<-4
# pew$dem<-na_if(recode(pew$pid,'1'=1,'2'=1,'3'=0,'4'=0,'0'=9),9)
# 
# pew$pidstr<-case_when(pew$partystr_w23==1~3,
#                       pew$partystr_w23==2~2,
#                       pew$partyln_w23==1|pew$partyln_w23==2~1,
#                       pew$partyln_w23==99~0,
#                       pew$partystr_w16==1~3,
#                       pew$partystr_w16==2~2,
#                       pew$partyln_w16==1|pew$partyln_w23==2~1,
#                       pew$partyln_w16==3~0,
#                       pew$partystr_w1==1~3,
#                       pew$partystr_w1==2~2,
#                       pew$partyln_w1==1|pew$partyln_w23==2~1,
#                       pew$partyln_w1==99~0                   )/3
# 
# 
# 
# pew$rep_therm = (na_if(pew$thermo_a_w15,70)-1)/68
# pew$dem_therm = (na_if(pew$thermo_b_w15,70)-1)/68
# pew$clinton_therm = (na_if(pew$thermo_thermclinton_w21,999))/100
# pew$trump_therm = (na_if(pew$thermo_thermtrump_w21,999))/100
# 
# pew$c<-pew$comment/3
# 
# 
# pew$imm1<-(4-na_if(pew$immvala_w20,99))/3
# pew$imm2<-(4-na_if(pew$immvalb_w20,99))/3
# pew$imm3<-(4-na_if(pew$immvalc_w20,99))/3
# pew$imm4<-(na_if(pew$immvald_w20,99)-1)/3
# pew$imm5<-(na_if(pew$immvalg_w20,99)-1)/3
# pew$imm6<-(4-na_if(pew$immvalf_w20,99))/3
# 
# psych::alpha(select(pew,imm1:imm6))
# pew$imm<-rowMeans(dplyr::select(pew,imm1:imm6),na.rm=T)
# 
# pew$pol.interest<-(4-na_if(pew$q2a_w9,5))/3
# 
# pew$pk1<-recode(pew$k1_w10,0,0,1,0,.default=0)
# pew$pk2<-recode(pew$k2_w10,0,1,0,0,.default=0)
# pew$pk3<-recode(pew$k3_w10,0,1,0,0,.default=0)
# pew$pk4<-recode(pew$k4_w10,0,0,1,0,.default=0)
# pew$pk5<-recode(pew$k5_w10,1,0,0,0,.default=0)
# pew$pk6<-recode(pew$k6_w10,0,0,1,0,.default=0)
# pew$pk7<-recode(pew$k7_w10,0,0,0,1,.default=0)
# pew$pk8<-recode(pew$k8_w10,0,1,0,0,.default=0)
# pew$pk9<-recode(pew$k9_w10,0,0,1,0,.default=0)
# pew$pk10<-recode(pew$k10_w10,1,0,0,0,.default=0)
# pew$pk11<-recode(pew$k11_w10,0,0,0,1,.default=0)
# pew$pk12<-recode(pew$k12_w10,0,1,0,0,.default=0)
# 
# pew$abortion[pew$abortionallow_w32==2]<-3
# pew$abortion[pew$abortionallow_w32==1]<-2
# pew$abortion[pew$abortionrestr_w32==1]<-1
# pew$abortion[pew$abortionrestr_w32==2]<-0
# pew$abortion<-pew$abortion/3
# 
# pew$liberal<-ifelse(pew$f_ideo<6, (pew$f_ideo-1)/4,NA)
# pew$ideo<-(1-2*pew$liberal)*10
# 
# lm(pew$ideo[pew$q22s2_11_w1==1]~1, weights = pew$weight_w1[pew$q22s2_11_w1==1])
# 
# 
# pew$aca<-ifelse(pew$acaapprv_w24.5<=2, 2-pew$acaapprv_w24.5,NA)
# 
# 
# psych::alpha(select(pew,pk1:pk12)) #Cronbach's alpha = 0.71
# pew$pk<-rowMeans(select(pew,pk1:pk12))
# 
# pew.o<-select(pew, q22s1_1_w1:q22s2_18_w1,dem,weight_w1)
# 
# pew.o$rep<-1-pew$dem
# 
# 
# 
# ##################################### YG recoding #################################
# 
# 
# outlet_bias<-NA
# for(i in 1:36) {  
#   outlet_bias[i]<-weighted.mean(filter(pew.o,pew.o[,i]==1)$rep, 
#                                 filter(pew.o,pew.o[,i]==1)$weight_w1,na.rm=T)
# }
# 
# 
# yg$dem<-ifelse(yg$pid7<=3,1,
#                ifelse(yg$pid7>=5&yg$pid7<=7,0,NA))
# yg$pidstr<-ifelse(yg$pid7>=8,NA,
#                   ifelse(yg$pid7==1|yg$pid7==7,1,0))
# yg$fb<-ifelse(yg$fb_freq==9,0,9-yg$fb_content_4)
# yg$fb3<-ifelse(yg$fb>=6,2,
#                ifelse(yg$fb==0,0,1))
# 
# 
# psych::alpha(data.frame(
#   a = (yg$senate_term==3),
#   b = (yg$president_term_limit==2),
#   c = (yg$state_senators==2),
#   d = (yg$uk_pm==4),
#   e = (yg$house_term==1)
# ))
# 
# 
# yg$pk<- (yg$senate_term==3) + (yg$president_term_limit==2) + (yg$state_senators==2) + (yg$uk_pm==4) + (yg$house_term==1)
# yg$pk<-yg$pk/5
# 
# yg$pol.interest<-ifelse(yg$polint<=5,(5-yg$polint)/4,NA)
# 
# yg$liberal<-ifelse(yg$libcon==8,.5,
#                    ifelse(yg$libcon>=98,NA,
#                           (7-yg$libcon)/6))
# 
# 
# yg$trump<-(4-yg$trump_approval)/3
# yg$trump_therm<-yg$trump # saving as "therm" for consistency with other data
# yg$caseid<-1:nrow(yg)
# 
# yg$female<-yg$gender_o==2
# yg$white<-2-yg$race_m_1
# yg$age<-2018-yg$birthyr
# yg$young<-yg$age<45
# yg$college<-yg$educ>=5
# yg$d<-yg$pid3==1
# yg$r<-yg$pid3==2
# yg$deml<-yg$pid7>=1 & yg$pid7<=3
# yg$repl<-yg$pid7>=5 & yg$pid7<=7
# 
# ## reverse coding (high = pro-rep) ##
# 
# anes$clinton_therm<-1-anes$clinton_therm
# pew$clinton_therm<-1-pew$clinton_therm
# anes$dem_therm<-1-anes$dem_therm
# pew$dem_therm<-1-pew$dem_therm
# anes$conservative<-1-anes$liberal
# pew$conservative<-1-pew$liberal
# yg$conservative<-1-yg$liberal
# anes$aca<-1-anes$aca
# pew$aca<-1-pew$aca
# 
# anes$rep<-1-anes$dem
# pew$rep<-1-pew$dem
# yg$rep<-1-yg$dem
# 
# 
# ##############################  FB RECODING #############################################
# 
# 
# 
# fb<-fb[!is.na(fb$comment_text),]# dropping comments without texts (GIFs, etc)
# 
# n2<-fb %>%
#   group_by(post_id) %>%
#   slice_max(comment_like, n = 2)
# 
# feature<-n2 %>%
#   group_by(post_id) %>%
#   summarise(
#     feature_toxic=mean(toxic),
#     feature_toxic_max=max(toxic)
#   ) # post-level feature chracteristics
# 
# fb$feature<-fb$comment_id%in%n2$comment_id #marking feature comments
# fb$feature[fb$comment_like==0]<-0 #removing comments tied at 0
# 
# fb<-left_join(fb,feature,by="post_id")  # merging post-level feature characteristics
# 
# rm(feature) # no longer needed
# rm(n2) # no longer needed
# 
# 
# #outlet bias as share of reps in pew survey
# fb$outlet_bias[fb$outlet==   "abc"	]<-	outlet_bias[1] 
# fb$outlet_bias[fb$outlet==   "breitbart"	]<-	outlet_bias[12]
# fb$outlet_bias[fb$outlet==   "bbc"	]<- outlet_bias[17]
# fb$outlet_bias[fb$outlet==   "bloomberg"	]<-	outlet_bias[33]
# fb$outlet_bias[fb$outlet==   "cbs"	]<-	outlet_bias[2]
# fb$outlet_bias[fb$outlet==   "cnn"	]<-	outlet_bias[21]
# fb$outlet_bias[fb$outlet==   "colbert"	]<-	outlet_bias[23]
# fb$outlet_bias[fb$outlet==   "dailykos"	]<-	outlet_bias[14]
# fb$outlet_bias[fb$outlet==   "dailyshow"	]<- outlet_bias[24]
# fb$outlet_bias[fb$outlet==   "fox"	]<-	outlet_bias[19]
# fb$outlet_bias[fb$outlet==   "glennbeck"	]<-	outlet_bias[25]
# fb$outlet_bias[fb$outlet==   "huffpost"	]<-	outlet_bias[11]
# fb$outlet_bias[fb$outlet==   "motherjones"	]<-	outlet_bias[31]
# fb$outlet_bias[fb$outlet==   "msnbc"	]<-	outlet_bias[20]
# fb$outlet_bias[fb$outlet==   "nbc"	]<-	outlet_bias[3]
# fb$outlet_bias[fb$outlet==   "nytimes"	]<-	outlet_bias[8]
# fb$outlet_bias[fb$outlet==   "npr"	]<-	outlet_bias[4]
# fb$outlet_bias[fb$outlet==   "pbs"	]<-	outlet_bias[22]
# fb$outlet_bias[fb$outlet==   "politico"	]<-	outlet_bias[30]
# fb$outlet_bias[fb$outlet==   "rachelmaddow"	]<-	outlet_bias[26]
# fb$outlet_bias[fb$outlet==   "rushlimbaugh"	]<-	outlet_bias[5]
# fb$outlet_bias[fb$outlet==   "seanhannity"	]<-	outlet_bias[6]
# fb$outlet_bias[fb$outlet==   "slate"	]<-	outlet_bias[29]
# fb$outlet_bias[fb$outlet==   "theblaze"	]<-	outlet_bias[13]
# fb$outlet_bias[fb$outlet==   "economist"	]<-	outlet_bias[36]
# fb$outlet_bias[fb$outlet==   "guardian"	]<-	outlet_bias[16]
# fb$outlet_bias[fb$outlet==   "newyorker"	]<- outlet_bias[18]
# fb$outlet_bias[fb$outlet==   "thinkprogress"	]<-	outlet_bias[15]
# fb$outlet_bias[fb$outlet==   "usatoday"	]<-	outlet_bias[28]
# fb$outlet_bias[fb$outlet==   "wsj"	]<-	outlet_bias[27]
# fb$outlet_bias[fb$outlet==   "washingtonpost"	]<-	outlet_bias[7]
# fb$outlet_bias[fb$outlet==   "yahoo"	]<-	outlet_bias[32]
# fb$outlet_bias[fb$outlet==   "newdrudge"	]<-	outlet_bias[9]
# 
# #short outlet labels to be included in a figure
# fb$short[fb$outlet==   "abc"	]<-	"ABC"
# fb$short[fb$outlet==   "breitbart"	]<-	"breitbart"
# fb$short[fb$outlet==   "bbc"	]<- "BBC"
# fb$short[fb$outlet==   "bloomberg"	]<-	"Bloomberg"
# fb$short[fb$outlet==   "cbs"	]<-	"CBS"
# fb$short[fb$outlet==   "cnn"	]<-	"CNN"
# fb$short[fb$outlet==   "colbert"	]<-	"Colbert"
# fb$short[fb$outlet==   "dailykos"	]<-	"DailyKos"
# fb$short[fb$outlet==   "dailyshow"	]<- "DailyShow"
# fb$short[fb$outlet==   "fox"	]<-	"Fox"
# fb$short[fb$outlet==   "glennbeck"	]<-	"GlennBeck"
# fb$short[fb$outlet==   "huffpost"	]<-	"HuffPost"
# fb$short[fb$outlet==   "motherjones"	]<-	"MotherJones"
# fb$short[fb$outlet==   "msnbc"	]<-	"MSNBC"
# fb$short[fb$outlet==   "nbc"	]<-	"NBC"
# fb$short[fb$outlet==   "nytimes"	]<- "NYT"
# fb$short[fb$outlet==   "npr"	]<-	"NPR"
# fb$short[fb$outlet==   "pbs"	]<-	"PBS"
# fb$short[fb$outlet==   "politico"	]<-	"Politico"
# fb$short[fb$outlet==   "rachelmaddow"	]<-	"Maddow"
# fb$short[fb$outlet==   "rushlimbaugh"	]<-	"Limbaugh"
# fb$short[fb$outlet==   "seanhannity"	]<-	"Hannity"
# fb$short[fb$outlet==   "slate"	]<-	"Slate"
# fb$short[fb$outlet==   "theblaze"	]<-	"Blaze"
# fb$short[fb$outlet==   "economist"	]<-	"Economist"
# fb$short[fb$outlet==   "guardian"	]<-	"Guardian"
# fb$short[fb$outlet==   "newyorker"	]<- "NYorker"
# fb$short[fb$outlet==   "thinkprogress"	]<-	"ThinkProgress"
# fb$short[fb$outlet==   "usatoday"	]<-	"USAToday"
# fb$short[fb$outlet==   "wsj"	]<-	"WSJ"
# fb$short[fb$outlet==   "washingtonpost"	]<-	"WaPo"
# fb$short[fb$outlet==   "yahoo"	]<-	"Yahoo"
# fb$short[fb$outlet==   "newdrudge"	]<-	"NewDrudge"
# 
# 
# fb$outlet3<-ifelse(fb$outlet_bias < .4,1,
#                    ifelse(fb$outlet_bias>=.4&fb$outlet_bias<=.6,0,2))
# fb$outlet3<-as.factor(fb$outlet3)
# fb$partisan<-ifelse(fb$outlet3==0,0,1)
# fb$toxic_percent<-percent_rank(fb$toxic)
# 
# ## Post-level data without feature comments
# post<-subset(fb,feature!=1) %>%
#   group_by(post_id) %>%
#   summarise(
#     toxic=mean(toxic),
#     comment=mean(comments_count_fb),
#     feature_toxic=mean(feature_toxic),
#   ) %>%
#   # excluding the number of "feature" comments from comment count
#   left_join(fb %>% group_by(post_id) %>% summarise(feature_n = sum(feature)), by = "post_id")
# 
# fb$comment_id<-1:nrow(fb) #replacing ids
# 
# 
# ############################## YG COMMENT LEVEL (yg3) ##########################################
# 
# 
# yg3 <- rbind(yg,yg,yg) 
# yg3<-cbind(yg3,reshape(data = data.frame(yg), 
#                        idvar = "caseid", 
#                        varying = list(toxic=c("toxic1","toxic2","toxic3"),comment=c("comment1","comment2","comment3"),
#                                       post_id=c("post_id1","post_id2","post_id3"),
#                                       post_text=c("post_message1","post_message2","post_message3"),
#                                       show_comment=c("show_comment_1","show_comment_2","show_comment_3"),
#                                       toxic_feature_1=c("toxic_feature_1_1","toxic_feature_1_2","toxic_feature_1_3"),
#                                       toxic_feature_2=c("toxic_feature_2_1","toxic_feature_2_2","toxic_feature_2_3"),
#                                       write=c("write1a","write2a","write3a"),
#                                       outlet_id=c("outlet_id1","outlet_id2","outlet_id3"), 
#                                       outlet=c("outlet1","outlet2","outlet3")), 
#                        direction="long", 
#                        v.names = c("toxic","comment",
#                                    "post_id","post_text","show_comment","toxic_feature_1","toxic_feature_2","write","outlet_id","outlet")) 
#            %>% select(toxic,comment,post_id,post_text,show_comment,toxic_feature_1,toxic_feature_2,write,outlet_id,outlet))
# 
# yg3$toxic[yg3$comment=="__NA__"]<-NA # removing *hard* NA's (those who didn't write anything at all)
# #yg3<-subset(yg3,!is.na(toxic))
# 
# yg3$post_text<-tolower(yg3$post_text)
# yg3$wordcount<-sapply(strsplit(yg3$comment, " "), length)
# yg3$show_comment<-2-yg3$show_comment
# yg3$comment_id<-paste0(yg3$caseid,"_",1:nrow(yg3))
# 
# 
# yg3$outlet_bias[yg3$outlet==   "abc"	]<-	outlet_bias[1]
# yg3$outlet_bias[yg3$outlet==   "breitbart"	]<-	outlet_bias[12]
# yg3$outlet_bias[yg3$outlet==   "bbc"	]<- outlet_bias[17]
# yg3$outlet_bias[yg3$outlet==   "bloomberg"	]<-	outlet_bias[33]
# yg3$outlet_bias[yg3$outlet==   "cbs"	]<-	outlet_bias[2]
# yg3$outlet_bias[yg3$outlet==   "cnn"	]<-	outlet_bias[21]
# yg3$outlet_bias[yg3$outlet==   "colbert"	]<-	outlet_bias[23]
# yg3$outlet_bias[yg3$outlet==   "dailykos"	]<-	outlet_bias[14]
# yg3$outlet_bias[yg3$outlet==   "dailyshow"	]<- outlet_bias[24]
# yg3$outlet_bias[yg3$outlet==   "fox"	]<-	outlet_bias[19]
# yg3$outlet_bias[yg3$outlet==   "glennbeck"	]<-	outlet_bias[25]
# yg3$outlet_bias[yg3$outlet==   "huffpost"	]<-	outlet_bias[11]
# yg3$outlet_bias[yg3$outlet==   "motherjones"	]<-	outlet_bias[31]
# yg3$outlet_bias[yg3$outlet==   "msnbc"	]<-	outlet_bias[20]
# yg3$outlet_bias[yg3$outlet==   "nbc"	]<-	outlet_bias[3]
# yg3$outlet_bias[yg3$outlet==   "nytimes"	]<-	outlet_bias[8]
# yg3$outlet_bias[yg3$outlet==   "npr"	]<-	outlet_bias[4]
# yg3$outlet_bias[yg3$outlet==   "pbs"	]<-	outlet_bias[22]
# yg3$outlet_bias[yg3$outlet==   "politico"	]<-	outlet_bias[30]
# yg3$outlet_bias[yg3$outlet==   "rachelmaddow"	]<-	outlet_bias[26]
# yg3$outlet_bias[yg3$outlet==   "rushlimbaugh"	]<-	outlet_bias[5]
# yg3$outlet_bias[yg3$outlet==   "seanhannity"	]<-	outlet_bias[6]
# yg3$outlet_bias[yg3$outlet==   "slate"	]<-	outlet_bias[29]
# yg3$outlet_bias[yg3$outlet==   "theblaze"	]<-	outlet_bias[13]
# yg3$outlet_bias[yg3$outlet==   "economist"	]<-	outlet_bias[36]
# yg3$outlet_bias[yg3$outlet==   "guardian"	]<-	outlet_bias[16]
# yg3$outlet_bias[yg3$outlet==   "newyorker"	]<- outlet_bias[18]
# yg3$outlet_bias[yg3$outlet==   "thinkprogress"	]<-	outlet_bias[15]
# yg3$outlet_bias[yg3$outlet==   "usatoday"	]<-	outlet_bias[28]
# yg3$outlet_bias[yg3$outlet==   "wsj"	]<-	outlet_bias[27]
# yg3$outlet_bias[yg3$outlet==   "washingtonpost"	]<-	outlet_bias[7]
# yg3$outlet_bias[yg3$outlet==   "yahoo"	]<-	outlet_bias[32]
# yg3$outlet_bias[yg3$outlet==   "newdrudge"	]<-	outlet_bias[9]
# yg3$outlet3<-ifelse(yg3$outlet_bias < .4,1,
#                     ifelse(yg3$outlet_bias>=.4&yg3$outlet_bias<=.6,0,2))
# yg3$outlet3<-as.factor(yg3$outlet3)
# 
# 
# yg3$fb<-ifelse(yg3$fb_freq==9,0,9-yg3$fb_content_4)
# yg3$fb3<-ifelse(yg3$fb>=6,2,
#                 ifelse(yg3$fb==0,0,1))
# 
# yg3$fb<-factor(ifelse(yg3$fb>=6,"Frequent FB Commentor",
#                       ifelse(yg3$fb==0,"No FB Comments",NA)))
# yg3$fb<-factor(yg3$fb,levels=c("No FB Comments","Frequent FB Commentor"))
# yg3$write<-(4-yg3$write)/3 # would write comment
# yg3$fbw<-ifelse(yg3$write==1,1,ifelse(yg3$write==0,0,NA))
# 
# 
# yg3$kavanaugh.issue<-str_detect(yg3$post_text,"kavanaugh|ford|ramirez|swetnick|supreme")
# yg3$midterm<-str_detect(yg3$post_text,"midterm|election")
# yg3$comment_text<-yg3$comment
# 
# 
# ######################## FB+YG ################################################
# 
# #appending fb, yg into single data
# fbyg<-rbind(fb
#             %>%subset(sample==1)
#             %>%select(toxic,wordcount,kavanaugh.issue,midterm,outlet3, comment_id),
#             yg3
#             %>%select(toxic,wordcount,kavanaugh.issue,midterm,outlet3, comment_id))
# 
# fbyg$data<-c(rep("Facebook",nrow(fb%>%subset(sample==1))),rep("YouGov",nrow(yg3)))
# fbyg$data<-factor(fbyg$data,levels=c("YouGov","Facebook"))
# fbyg$caseid[fbyg$data=="YouGov"]<-as.character(yg3$caseid)
# fbyg$caseid[fbyg$data=="Facebook"]<-subset(fb, sample==1)$comment_id
# fbyg$weight[fbyg$data=="YouGov"]<-yg3$weight
# fbyg$weight[fbyg$data=="Facebook"]<-1
# fbyg$post_id[fbyg$data=="YouGov"]<-yg3$post_id
# fbyg$post_id[fbyg$data=="Facebook"]<-subset(fb, sample==1)$post_id
# fbyg$fb3<-c(rep(2,fb%>%subset(sample==1)%>%nrow()),
#             yg3$fb3)
# 
# 
# ######################## Validation Data ################################################
# 
# # labelR validation
# val.data<-rbind(fb%>%select(comment_text,comment_id,toxic),yg3%>%select(comment_text,comment_id,toxic)) # validation data with fb,yg comments
# labelr<-left_join(labelr,val.data%>%select(comment_id,toxic)) # merging
# 
# 
# # dictionary validation
# 
# # creating fb comment dfm
# fb_dfm<-dfm(val.data$comment_text, tolower = T, remove = c(stopwords("en"), stop_words$word), remove_symbols = T, remove_punct = T , remove_numbers = T, remove_url = T, stem = TRUE)%>%dfm_trim(min_termfreq=800 )
# 
# # labelled top 5k words where  narrow = 295 uncivil words
# top5k$narrow[is.na(top5k$narrow)]<-0
# 
# # creating dictionary
# dic<-top5k%>%filter(narrow==1)%>%pull(word)
# uncivil<-list(
#   narrow = dic # narrow = uncivil (narrowly defined)
# )%>%dictionary()
# 
# # applying dictionary to fb dfm
# fb_dfm<-dfm_lookup(fb_dfm,dictionary = uncivil)%>%
#   convert(to = "data.frame")
# 
# # attaching dictionary variable (narrow) to validation data to be plotted
# val.data$narrow<-fb_dfm$narrow
# rm(fb_dfm) # removing dfm from workspace for RAM
# val.data$narrow[val.data$narrow>1]<-1
# val.data$data<-rep("fb",nrow(fb))%>%c(rep("yg",nrow(yg3)))
# 
# 
# ####################### ####################### ####################### ####################### 
# ####################### Removing Potentially sensitive information ####################
# ####################### ####################### ####################### ####################### 
# 
# fb$comment_text<-NA
# fb$comment_by<-NA
# yg$comment1<-NA
# yg$comment2<-NA
# yg$comment3<-NA
# yg3$comment<-NA
# yg3$comment_text<-NA
# yg3$comment1<-NA
# yg3$comment2<-NA
# yg3$comment3<-NA
# 
# save.image("JOC Replication.RData")


########################################################################
########################################################################
########################## 2 Main Results ##############################
########################################################################
########################################################################

load("JOC Replication.RData")


############################ Data (pp.8-9) #############################################


subset(fb) %>% group_by(post_id)%>%summarise()%>%nrow() # n of post
nrow(fb) # n of comments
lm(white~1,yg, weights = weight)
lm(female~1,yg, weights = weight)
lm(young~1,yg, weights = weight)
lm(college~1,yg, weights = weight)
lm(d~1,yg, weights = weight)
lm(r~1,yg, weights = weight)
lm(deml~1,yg, weights = weight) # dem + leaner
lm(repl~1,yg, weights = weight) # rep + leaner

############################ FIGURE 1 #############################################

pew$weight<-1

# extracting variables to be included in figure 2
df.f2<- list(data.frame(select(anes, pol.interest,pk,pidstr,nte, c1, weight)),
             data.frame(select(pew, pol.interest,pk,pidstr, comment, weight)),
             data.frame(select(yg, pol.interest,pk,pidstr, fb3, weight)))

# empty list
f2.models<-list()

# storing 10 models in the empty list
for(i in 1:10) {                
  for(i in 1:4){
    f2.models[[i]]<-lm(df.f2[[1]][,i] ~ c1, df.f2[[1]], weights = weight)
  }
  for(i in 5:7){
    f2.models[[i]]<-lm(df.f2[[2]][,i-4] ~ as.factor(comment), df.f2[[2]], weights = weight)
  }
  for(i in 8:10){
    f2.models[[i]]<-lm(df.f2[[3]][,i-7] ~ as.factor(fb3), df.f2[[3]], weights = weight)
  }
}

# extracting estimates to display in fig 2 from the list and storing in est
est<-data.frame(
  term = rep(NA,10),
  estimate = rep(NA,10),
  std.error = rep(NA,10),
  statistic = rep(NA,10),
  p.value = rep(NA,10)
)

for(i in 1:10) {                
  for(i in 1:4){
    est[i,]<-tidy(f2.models[[i]])[2,]
  }
  for(i in 5:7){
    est[i,]<-tidy(f2.models[[i]])[4,]
  }
  for(i in 8:10){
    est[i,]<-tidy(f2.models[[i]])[3,]
  }
}

#formatting est dataframe for ggplot

est$conf.low<-est$estimate-1.96*est$std.error
est$conf.high<-est$estimate+1.96*est$std.error
est$x<-c(1:4,1:3,1:3)
est$data<-c(rep("ANES 2016",4),rep("Pew 2015-2018",3),rep("YouGov 2018",3))

dvlist<-c("Political interest",
          "Political knowledge",
          "Strong partisan",
          "Need to evaluate")

# figure theme
th<-theme(panel.grid.major=element_blank(),
          panel.grid.minor=element_blank(),
          panel.border=element_rect(colour="black"),
          legend.position = c(0.9, 0.9),
          legend.text=element_text(),
          legend.key.size = unit(2, 'mm'),
          strip.background = element_rect(fill ="white",color = "white",size=1),
          axis.title.y= element_blank(),
          axis.text.x = element_text(color = "black"),
          axis.text.y = element_text(color = "black"))


ggplot(data=subset(est), 
       aes(x=-x, y=estimate))+
  facet_grid(. ~ data,scales = "free")+
  geom_point(aes(),position=position_dodge(0.2),size=1.8,alpha=.7)+
  geom_linerange(aes(ymin = conf.low, ymax = conf.high),position=position_dodge(0.2), size=0.2,alpha=.7)+
  ylim(c(-0.005,.3))+
  scale_x_continuous(breaks=-1:-4,labels=dvlist)+
  labs(y="",x="")+
  geom_hline(yintercept = 0, linetype="dashed",size=0.2)+theme_bw()+coord_flip()+th
ggsave("fig2.pdf",width=6.5,height=2.5)


############################ FIGURE 2 #############################################


# reverse-coding commenting
anes$cr<-1-anes$c1 
pew$cr<-3-pew$comment
yg$cr<-3-yg$fb3

#extracting vars

df.f3<- list(data.frame(select(anes, trump_therm,clinton_therm,rep_therm,dem_therm,conservative,abortion,aca,imm, c1,cr, rep, weight)),
             data.frame(select(pew, trump_therm,clinton_therm,rep_therm,dem_therm,conservative,abortion,aca,imm, comment, cr,rep, weight)),
             data.frame(select(yg, trump, conservative,fb3, cr,rep, weight)))
f3.models<-list()
f3.models2<-list()


# 18 models stored in a list (rep: partisan gap among non-commenters)
for(i in 1:18) {                
  for(i in 1:8){
    f3.models[[i]]<-lm(df.f3[[1]][,i] ~ rep*c1, df.f3[[1]], weights = weight)
  }
  for(i in 9:16){
    f3.models[[i]]<-lm(df.f3[[2]][,i-8] ~ rep*as.factor(comment), df.f3[[2]], weights = weight)
  }
  for(i in 17:18){
    f3.models[[i]]<-lm(df.f3[[3]][,i-16] ~ rep*as.factor(fb3), df.f3[[3]], weights = weight)
  }
}

# 18 models stored in a list (rep: partisan gap among commenters)
for(i in 1:18) {                
  for(i in 1:8){
    f3.models2[[i]]<-lm(df.f3[[1]][,i] ~ rep*cr, df.f3[[1]], weights = weight)
  }
  for(i in 9:16){
    f3.models2[[i]]<-lm(df.f3[[2]][,i-8] ~ rep*as.factor(cr), df.f3[[2]], weights = weight)
  }
  for(i in 17:18){
    f3.models2[[i]]<-lm(df.f3[[3]][,i-16] ~ rep*as.factor(cr), df.f3[[3]], weights = weight)
  }
}

# extracting estimates to display in fig 2 from the list and storing in est
est<-data.frame(
  term = rep(NA,36),
  estimate = rep(NA,36),
  std.error = rep(NA,36),
  statistic = rep(NA,36),
  p.value = rep(NA,36)
)


for(i in 1:36) {                
  for(i in 1:18){
    est[i,]<-tidy(f3.models[[i]])[2,]
  }
  for(i in 19:36){
    est[i,]<-tidy(f3.models2[[i-18]])[2,]
  }
}

est$conf.low<-est$estimate-1.96*est$std.error
est$conf.high<-est$estimate+1.96*est$std.error
est$x<-rep(c(1:4,7:10,1:2,5:10,1,7),2)
est$data<-rep(c(rep("ANES 2016",8),rep("Pew 2015-2018",8),rep("YouGov 2018",2)),2)
est$comment<-c(rep(0,18),rep(1,18))

dvlist<-c("Like/support Trump","Dislike Clinton",
          "Like Rep Party","Dislike Dem Party",
          "Like Republicans","Dislike Democrats",
          "Conservative ideology","Pro-life",
          "Oppose ACA", "Restrict immigration")



ggplot(data=est, aes(x=-x, y=estimate,shape=as.factor(comment),color=as.factor(comment)))+
  facet_grid(. ~ data,scales = "free")+
  scale_x_continuous(breaks=-1:-10,labels=dvlist)+
  geom_point(aes(),position=position_dodge(0.3),size=1.8,alpha=.7)+
  geom_linerange(aes(ymin = conf.low, ymax = conf.high),position=position_dodge(0.3), size=0.2,alpha=.7)+
  scale_shape_manual(name = c(""),
                     breaks=c(1,0),
                     labels = c("Commenters", "Non-Commenters"),
                     values=c(17, 16))+
  scale_color_manual(name = c(""),
                     breaks=c(1,0),
                     labels = c("Commenters", "Non-Commenters"),
                     values=c("red3","grey2"))+
  geom_hline(yintercept = 0, linetype="dashed",size=0.2)+theme_bw()+coord_flip()+
  labs(y="Estimated partisan difference (Republicans - Democrats)",x="")+
  theme_bw()+ylim(0,.95)+
  theme(panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        panel.border=element_rect(colour="black"),
        legend.position = c(0.85, 0.84),
        #legend.text=element_text(size=6),
        legend.key.size = unit(2, 'mm'),
        strip.background = element_rect(fill ="white",color = "white",size=1),
        plot.title = element_text(hjust=0.5),
        axis.text.x = element_text(color = "black"),
        axis.text.y = element_text(color = "black"),
        axis.title = element_text(size=11))
ggsave("fig3-partisan-gap.pdf",width=6.5,height=6.5)



########################## FIG 3 ####################################

fig4<-ggplot(yg3[!is.na(yg3$fb),],aes(x=toxic,color=fb,fill=fb,linetype=fb))+geom_density(bw=.05,alpha=0.2,size=.2)+
  annotate(geom="text",x=.5,y=2.7,label=c("Toxicity = 0.5
                                        \nGOOD LEAVE FOR AFRICA
                                        \nShould of been sterilized"),size=2)+
  annotate(geom="text",x=.72,y=2.7,label=c("Toxicity = 0.75
                                        \nCriminal!!!!
                                        \nDisgusting..."),size=2)+
  annotate(geom="text",x=.92,y=2.7,label=c("Toxicity = 0.99
                                        \nShe's a stupid, worthless b*  
                                        \nRacist ass motherf*****"),size=2)+
  scale_color_manual(name = c(""), values=c("grey2","red3"))+
  scale_fill_manual(name = c(""),values=c("grey2","red3"))+
  scale_linetype_manual(name = c(""),values=c("solid","dashed"))+
  labs(x="Toxicity",y="")+ theme_bw()+
  annotate("curve", x = c(.13,.5), y = c(4.3,.8), 
           xend = c(.10,.5), yend = c(4.3,.5), color=c("grey2","red3"), linetype=c("solid","dashed"),
           arrow = arrow(angle = 30, length = unit(1, "mm")),size=0.2,alpha=0.7, curvature = .1)+
  annotate(geom="text",x = c(.14,.5), y = c(4.3,.9), 
           label=c("''Never comment''\n(self-report in YouGov survey)",
                   "''Frequently comment''\n(self-report in YouGov survey)"),
           color=c("grey2","red3"),vjust=c(.5,0),hjust=c(0,.5),size=3)+
  #annotate(geom="text",x=0.5,y=4.8,label="Mean Difference = 0.050 (p < 0.001)",size=2.5)+
  xlim(0,1)+ylim(0,5)+theme(panel.grid.major=element_blank(),
                            panel.grid.minor=element_blank(),
                            axis.text = element_text(color = "black"),
                            axis.title = element_text(color = "black"),
                            legend.position="none")
fig4
ggsave("fig4-1-yg-by-selfreported-commenting.pdf",width=5,height=3)


felm(toxic~as.factor(fb3)|0|0|caseid,yg3,weights=yg3$weight)%>%summary()

########################## FIG 4  ####################################





ggplot(fbyg,aes(x=toxic,color=data,fill=data,linetype=data))+geom_density(bw=.05,alpha=0.2,size=.2)+
  annotate(geom="text",x=.5,y=2.7,label=c("Toxicity = 0.5
                                        \nGOOD LEAVE FOR AFRICA
                                        \nShould of been sterilized"),size=2)+
  annotate(geom="text",x=.72,y=2.7,label=c("Toxicity = 0.75
                                        \nCriminal!!!!
                                        \nDisgusting..."),size=2)+
  annotate(geom="text",x=.92,y=2.7,label=c("Toxicity = 0.99
                                        \nShe's a stupid, worthless b*  
                                        \nRacist ass motherf*****"),size=2)+
  scale_color_manual(name = c(""), values=c("grey2","red3"))+
  scale_fill_manual(name = c(""),values=c("grey2","red3"))+
  scale_linetype_manual(name = c(""),values=c("solid","dashed"))+
  labs(x="Toxicity",y="")+ theme_bw()+
  annotate("curve", x = c(.14,.5), y = c(4.7,1.1), 
           xend = c(.09,.5), yend = c(4.4,.8), color=c("grey2","red3"), linetype=c("solid","dashed"),
           arrow = arrow(angle = 30, length = unit(1, "mm")),size=0.2,alpha=0.7, curvature = .1)+
  annotate(geom="text",x = c(.21,.5), y = c(4.9,1.3), 
           label=c("YouGov: Comments in survey",
                   "Facebook: Comments on platform"),
           color=c("black","red3"),vjust=c(.5,0), size = 3)+
  xlim(0,1)+ylim(0,5)+theme(panel.grid.major=element_blank(),
                            panel.grid.minor=element_blank(),
                            axis.text = element_text(color = "black"),
                            axis.title = element_text(color = "black"),
                            legend.position="none")
ggsave("fig5-fb-vs-yg.pdf",width=5,height=3)


table(fbyg$data[!is.na(fbyg$toxic)])
# YouGov Facebook 
# 6567  1188454 
fbyg%>%group_by(post_id)%>%summarise()%>%nrow() # n of posts
felm(toxic~data|0|0|caseid,fbyg,weights = fbyg$weight)%>%summary()

################## Figure 5: Toxicity & Likes #################################### 



ggplot(fb,aes(x=toxic_percent,y=comment_like,color=as.factor(outlet3)))+
  geom_smooth(aes(fill=as.factor(outlet3)),alpha=0.2,size=0.2)+
  coord_cartesian(ylim = c(0, 4))+
  scale_color_manual(name = c(""),
                     values=c("grey4","navy","red4"))+
  scale_fill_manual(name = c(""),
                    values=c("grey4","navy","red4"))+
  annotate("text",x=0.69,y=4,label="Liberal",size=3,color="navy")+
  annotate("text",x=0.69,y=3,label="Conservative",size=3,color="red4")+
  annotate("text",x=0.69,y=2.3,label="Neutral",size=3,color="black")+
  labs(x="Toxicity (percentile)",y="Likes")+theme_bw()+
  theme(panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        axis.text = element_text(color = "black", size=6),
        axis.title = element_text(color = "black", size=8),
        legend.position="none")
ggsave("fig7.pdf",width=5,height=3)



############################ FIGURE 6 #############################################


ggplot(post,aes(x=feature_toxic,y=comment-feature_n),color="grey4")+
  geom_smooth(method = "loess",span=0.375,alpha=0.3,size=0.2,color="grey4")+
  labs(x="Average Toxicity of Top 2 Comments",y="Number of Total Comments")+ theme_bw()+
  geom_rug(alpha=0.04,sides="b")+
  theme(panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        axis.text = element_text(color = "black", size=6),
        axis.title = element_text(color = "black", size=8),
        legend.position="none")+ coord_cartesian(ylim = c(0, 2000))
ggsave("fig9-1-no-bins.pdf",width=2.5,height=2.5)

ggplot(post,aes(x=feature_toxic,y=toxic))+
  geom_smooth(method = "loess",span=0.375,alpha=0.3,size=0.2,color="grey4")+
  labs(x="Average toxicity of top 2 comments",y="Average toxicity of other comments")+ theme_bw()+
  geom_rug(alpha=0.04,sides="b")+
  theme(panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        axis.text = element_text(color = "black", size=6),
        axis.title = element_text(color = "black", size=8),
        legend.position="none")+
  coord_cartesian(ylim = c(0.08, 0.46))
ggsave("fig9-2-no-bins.pdf",width=2.5,height=2.5)

############################ FIGURE 7 #############################################

yg3$meantox<-rowMeans(select(yg3,toxic_feature_1,toxic_feature_2))
yg3$cond<-as.factor(yg3$show_comment)


ggplot(yg3,aes(x=meantox,y=toxic,color=cond))+
  stat_smooth(method="loess",aes(color=cond,fill=cond,linetype=cond),size=0.3,alpha=0.2)+
  scale_color_manual(name = c(""), values=c("grey2","red3"))+
  scale_fill_manual(name = c(""),values=c("grey2","red3"))+
  scale_linetype_manual(name = c(""),values=c("solid","dashed"))+
  labs(x="Average toxicity of featured comments",
       y= "Toxicity of respondent comments")+
  annotate("curve", x = c(.78), y = c(.14,.36), 
           xend = c(.78), yend = c(.17,.33), color=c("grey2","red3"), linetype=c("solid","dashed"),
           arrow = arrow(angle = 30, length = unit(1, "mm")),size=0.2,alpha=0.7, curvature = 0)+
  annotate(geom="text",x = c(.78), y = c(.13,.37),
           label=c("Featured\ncomments\nhidden","Featured\ncomments\nshown"),
           size=c(2),color=c("grey2","red3"),vjust=c(1,0))+
  theme_bw()+
  geom_rug(alpha=0.03,sides="b")+
  coord_cartesian(ylim = c(0.05, 0.45))+
  theme(panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        axis.text = element_text(color = "black", size=6),
        axis.title = element_text(color = "black", size=8),
        legend.position="none")
ggsave("fig10-2-no-bins-yg-contagio-downstream-toxic.pdf",width=2.5,height=2.5)


range(yg3$meantox,na.rm=T) # [1] 0.0434826 0.8984462

est0<-range(predict(loess(toxic~meantox,subset(yg3,cond==0)))) #[1] 0.1332569 0.2113133
est1<-range(predict(loess(toxic~meantox,subset(yg3,cond==1)))) #[1] 0.1262327 0.3320623

est1[2]-est0[2] # marginal effect of worst comment = 0.12
(est1[2]-est0[2])/sd(yg3$toxic, na.rm = T) # marginal effect of worst comment = 0.55 SD


ggplot(as.data.frame(yg3),aes(x=meantox,y=as.numeric(write),color=cond))+
  stat_smooth(method="loess",aes(color=cond,fill=cond,linetype=cond),size=0.3,alpha=0.2)+
  scale_color_manual(name = c(""), values=c("grey2","red3"))+
  scale_fill_manual(name = c(""),values=c("grey2","red3"))+
  scale_linetype_manual(name = c(""),values=c("solid","dashed"))+
  labs(x="Average toxicity of featured comments",
       y= "Willingness to comment")+
  annotate("curve", x = c(.5), y = c(.20,.32), 
           xend = c(.5), yend = c(.22,.30), color=c("grey2","red3"), linetype=c("solid","dashed"),
           arrow = arrow(angle = 30, length = unit(1, "mm")),size=0.2,alpha=0.7, curvature = 0)+
  annotate(geom="text",x = c(.5), y = c(.195,.325),
           label=c("Featured\ncomments\nhidden","Featured\ncomments\nshown"),
           size=c(2),color=c("grey2","red3"),vjust=c(1,0))+
  geom_rug(alpha=0.03,sides="b")+
  coord_cartesian(ylim = c(0.05, 0.45))+
  theme_bw()+
  theme(panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        axis.text = element_text(color = "black", size=6),
        axis.title = element_text(color = "black", size=8),
        legend.position="none")
ggsave("fig10-1-no-bins-yg-contagion-willing-to-comment.pdf",width=2.5,height=2.5)


##




w1 <- felm(write ~ cond|0 |0| caseid, yg3)
w2 <- felm(write ~ meantox|0 |0| caseid, yg3)
w3 <- felm(write ~ cond*meantox|0 |0| caseid, yg3)
w4 <- felm(write ~ cond|post_id |0| caseid, yg3)
w5 <- felm(write ~ cond*meantox|post_id |0| caseid, yg3)

t1 <- felm(toxic ~ cond|0 |0| caseid, yg3)
t2 <- felm(toxic ~ meantox|0 |0| caseid, yg3)
t3 <- felm(toxic ~ cond*meantox|0 |0| caseid, yg3)
t4 <- felm(toxic ~ cond|post_id |0| caseid, yg3)
t5 <- felm(toxic ~ cond*meantox|post_id |0| caseid, yg3)



stargazer(w1,w2,w3,w4,w5,t1,t2,t3,t4,t5,
          keep = c("cond1","meantox","cond1:meantox","Constant"),
          keep.stat=c("n","adj.rsq"),
          model.numbers = F,
          dep.var.labels = c("DV = Willingness to comment","DV = Toxicity of YG respondent comments"),
          covariate.labels = c("Treatment (top comments shown)",
                               "Average toxicity of top comments",
                               "Treatment $\times$ top comments toxicity",
                               "Constant"),
          add.lines = list(c("Article fixed effects","No","No","No","Yes","Yes","No","No","No","Yes","Yes")),
          star.cutoffs = c(0.05,0.01,0.005),
          notes = "Standard errors are clustered at the respondent level.",
          column.labels = c("(1)","(2)","(3)","(4)","(5)","(6)","(7)","(8)","(9)","(10)"),
          style = "apsr")

########################################################################
########################################################################
########################## 3 Appendix ##################################
########################################################################
########################################################################


################# Appendix:A  #######################################

################# Table A1  #######################################


outlet<-fb %>%
  group_by(outlet) %>%
  summarise(
    outlet_bias=mean(outlet_bias),
    outlet3=mean(as.numeric(outlet3)),
    toxic = mean(toxic),
    n  = n()
  )

outlet$outlet3<-case_when(outlet$outlet3==1~"neutral",
                          outlet$outlet3==2~"liberal",
                          outlet$outlet3==3~"conservative"
)

firstup <- function(x) {
  x <- tolower(x)
  substr(x, 1, 1) <- toupper(substr(x, 1, 1))
  x
}


outlet$outlet<-firstup(outlet$outlet) 
outlet$outlet[1]<-"ABC"
outlet$outlet[2]<-"BBC"
outlet$outlet[3]<-"Bloomberg"
outlet$outlet[5]<-"CBS"
outlet$outlet[6]<-"CNN"
outlet$outlet[7]<-"The Late Show with Stephen Colbert"
outlet$outlet[8]<-"Daily Kos"
outlet$outlet[9]<-"The Daily Show with Trevor Noah"
outlet$outlet[10]<-"The Economist"
outlet$outlet[11]<-"Fox News Channel"
outlet$outlet[12]<-"Glenn Beck"
outlet$outlet[13]<-"The Guardian"
outlet$outlet[14]<-"Huffington Post"
outlet$outlet[15]<-"Mother Jones Post"
outlet$outlet[16]<-"MSNBC"
outlet$outlet[17]<-"NBC"
outlet$outlet[18]<-"New Drudge"
outlet$outlet[19]<-"The New Yorker"
outlet$outlet[20]<-"NPR"
outlet$outlet[21]<-"The New York Times"
outlet$outlet[22]<-"PBS"
outlet$outlet[24]<-"The Rachel Maddow Show"
outlet$outlet[25]<-"Rush Limbaugh"
outlet$outlet[26]<-"Sean Hannity"
outlet$outlet[28]<-"The Blaze"
outlet$outlet[29]<-"Think Progress"
outlet$outlet[30]<-"USA Today"
outlet$outlet[31]<-"The Washington Post"
outlet$outlet[32]<-"The Wallstreet Journal"

outlettab<-xtable(outlet)

align(outlettab) <- xalign(outlettab)

outlettab

outlet$short<-outlet$outlet
outlet$short[7]<-"Colbert"
outlet$short[8]<-"DailyKos"
outlet$short[9]<-"DailyShow"
outlet$short[10]<-"Economist"
outlet$short[11]<-"Fox"
outlet$short[12]<-"GlennBeck"
outlet$short[13]<-"Guardian"
outlet$short[14]<-"HuffPostt"
outlet$short[15]<-"MotherJones"
outlet$short[16]<-"MSNBC"
outlet$short[17]<-"NBC"
outlet$short[18]<-"Drudge"
outlet$short[19]<-"NewYorker"
outlet$short[20]<-"NPR"
outlet$short[21]<-"NYT"
outlet$short[22]<-"PBS"
outlet$short[24]<-"Maddow"
outlet$short[25]<-"Limbaugh"
outlet$short[26]<-"Hannity"
outlet$short[28]<-"Blaze"
outlet$short[29]<-"ThinkProg"
outlet$short[30]<-"USAToday"
outlet$short[31]<-"WaPo"
outlet$short[32]<-"WSJ"


################# Appendix B #######################################

################# Table B1 : Sample comments by toxicity ###

# NOTE: The actual texts in Table B1 will not be displayed with the following codes since comment texts have been redacted.

set.seed(20201109)
sample.text<-tibble(Toxicity = c(1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10)*0.1,
                    Text = c(sample(fb$comment_text[fb$toxic>.099 & fb$toxic<.101],2),
                             sample(fb$comment_text[fb$toxic>.199 & fb$toxic<.201],2),
                             sample(fb$comment_text[fb$toxic>.299 & fb$toxic<.301],2),
                             sample(fb$comment_text[fb$toxic>.399 & fb$toxic<.401],2),
                             sample(fb$comment_text[fb$toxic>.499 & fb$toxic<.501],2),
                             sample(fb$comment_text[fb$toxic>.599 & fb$toxic<.601],2),
                             sample(fb$comment_text[fb$toxic>.699 & fb$toxic<.701],2),
                             sample(fb$comment_text[fb$toxic>.799 & fb$toxic<.801],2),
                             sample(fb$comment_text[fb$toxic>.899 & fb$toxic<.901],2),
                             sample(fb$comment_text[fb$toxic>.99],2)),
)

print(xtable(sample.text, digits = c(0,1,1)), include.rownames=F)


################# Appendix c: Descriptive Statistics#######################################

############################# Table C1  #####################################


## Re-assembling post-level data (with feature comments included)
post<-subset(fb) %>%
  group_by(post_id) %>%
  summarise(
    toxic=mean(toxic),
    engagement=mean(engagement_fb),
    comment=mean(comments_count_fb),
    outlet=mean(as.numeric(outlet_id)),
    feature_toxic_max=mean(feature_toxic_max),
    feature_toxic=mean(feature_toxic),
    outlet_bias=mean(outlet_bias))

fb.p.describe<-psych::describe(select(post,toxic,engagement,comment),fast=TRUE)[,-1]

print(xtable(fb.p.describe, digits = c(0,0,2,2,2,2,2,2)), include.rownames=T)

############################# Table C2  #####################################

fb.c.describe<-psych::describe(select(fb,toxic,comment_like),fast=TRUE)[,-1]

print(xtable(fb.c.describe, digits = c(0,0,2,2,2,2,2,2)), include.rownames=T)

############################# Table C3  #####################################


yg$fb3.0<-ifelse(yg$fb3==0,1,ifelse(yg$fb3!=0,0,NA))
yg$fb3.1<-ifelse(yg$fb3==1,1,ifelse(yg$fb3!=1,0,NA))
yg$fb3.2<-ifelse(yg$fb3==2,1,ifelse(yg$fb3!=2,0,NA))

yg.r.describe<-yg %>% 
  select(fb3.0,fb3.1,fb3.2,pol.interest,pk,rep,pidstr,trump, conservative) %>% 
  as.data.frame() %>% 
  describe(fast=TRUE, trim = NULL)


print(xtable(yg.r.describe[,-1], digits = c(0,0,2,2,2,2,2,2)), include.rownames=T)


############################# Table C4  #####################################


yg.c.describe<-yg3 %>% 
  select(toxic, write, meantox) %>% 
  as.data.frame() %>% 
  describe(fast=TRUE, trim = NULL)

print(xtable(yg.c.describe[,-1], digits = c(0,0,2,2,2,2,2,2)), include.rownames=T)

############################# Table C5  #####################################

anes$c0<-1-anes$c1

anes.describe<-anes %>% 
  select(c0,c1,pol.interest,pk,pidstr,nte,rep,trump_therm,clinton_therm,rep_therm,dem_therm,conservative,abortion,aca,imm) %>% 
  as.data.frame() %>% 
  describe(fast=TRUE, trim = NULL)


print(xtable(anes.describe[,-1], digits = c(0,0,2,2,2,2,2,2)), include.rownames=T)

############################# Table C6  #####################################


pew$comment0<-ifelse(pew$comment==0,1,ifelse(pew$comment!=0,0,NA))
pew$comment1<-ifelse(pew$comment==1,1,ifelse(pew$comment!=1,0,NA))
pew$comment2<-ifelse(pew$comment==2,1,ifelse(pew$comment!=2,0,NA))
pew$comment3<-ifelse(pew$comment==3,1,ifelse(pew$comment!=3,0,NA))

pew.describe<-pew %>% 
  select(comment0:comment3,pol.interest,pk,pidstr,trump_therm,clinton_therm,rep_therm,dem_therm,conservative,abortion,aca,imm) %>% 
  as.data.frame() %>% 
  describe(fast=TRUE, trim = NULL)

print(xtable(pew.describe[,-1], digits = c(0,0,2,2,2,2,2,2)), include.rownames=T)


################# Appendix D: Additional Results #######################################


########################## Figure D1  ####################################

ggplot(yg3[!is.na(yg3$fbw),],aes(x=toxic,color=as.factor(fbw),fill=as.factor(fbw),linetype=as.factor(fbw)))+geom_density(bw=.05,alpha=0.2,size=.2)+
  annotate(geom="text",x=.5,y=2.7,label=c("Toxicity = 0.5
                                        \nGOOD LEAVE FOR AFRICA
                                        \nShould of been sterilized"),size=2)+
  annotate(geom="text",x=.74,y=2.7,label=c("Toxicity = 0.75
                                        \nCriminal!!!!
                                        \nDisgusting..."),size=2)+
  annotate(geom="text",x=.94,y=2.7,label=c("Toxicity = 0.99
                                        \nShe's a stupid, worthless b*  
                                        \nRacist ass motherf*****"),size=2)+
  scale_color_manual(name = c(""), values=c("grey2","red3"))+
  scale_fill_manual(name = c(""),values=c("grey2","red3"))+
  scale_linetype_manual(name = c(""),values=c("solid","dashed"))+
  labs(x="Toxicity",y="")+ theme_bw()+
  annotate("curve", x = c(.13,.5), y = c(4.3,1.1), 
           xend = c(.10,.5), yend = c(4.3,.9), color=c("grey2","red3"), linetype=c("solid","dashed"),
           arrow = arrow(angle = 30, length = unit(1, "mm")),size=0.2,alpha=0.7, curvature = .1)+
  annotate(geom="text",x = c(.14,.6), y = c(4.3,1.2), 
           label=c("''Definitely would not comment'' (commenting task in YouGov)",
                   "''Definitely would comment'' (commenting task in YouGov)"),color=c("grey2","red3"),vjust=c(.5,0),hjust=c(0,.5),size=3)+
  xlim(0,1)+ylim(0,5)+theme(panel.grid.major=element_blank(),
                            panel.grid.minor=element_blank(),
                            axis.text = element_text(color = "black"),
                            axis.title = element_text(color = "black"),
                            legend.position="none")
ggsave("fig4-2-yg-by-willing-to-comment.pdf",width=5,height=3)


########################## Figure D2  ####################################


ggplot(subset(fbyg,fb3==2),aes(x=toxic,color=data,fill=data,linetype=data))+geom_density(bw=.05,alpha=0.2,size=.2)+
  annotate(geom="text",x=.5,y=2.7,label=c("Toxicity = 0.5
                                        \nGOOD LEAVE FOR AFRICA
                                        \nShould of been sterilized"),size=1.5)+
  annotate(geom="text",x=.74,y=2.7,label=c("Toxicity = 0.75
                                        \nCriminal!!!!
                                        \nDisgusting..."),size=1.5)+
  annotate(geom="text",x=.94,y=2.7,label=c("Toxicity = 0.99
                                        \nShe's a stupid, worthless b*  
                                        \nRacist ass motherf*****"),size=1.5)+
  scale_color_manual(name = c(""), values=c("grey2","red3"))+
  scale_fill_manual(name = c(""),values=c("grey2","red3"))+
  scale_linetype_manual(name = c(""),values=c("solid","dashed"))+
  labs(x="Toxicity",y="")+ theme_bw()+
  annotate("curve", x = c(.14,.5), y = c(4.1,1.4), 
           xend = c(.11,.5), yend = c(3.8,.8), color=c("grey2","red3"), linetype=c("solid","dashed"),
           arrow = arrow(angle = 30, length = unit(1, "mm")),size=0.2,alpha=0.7, curvature = .1)+
  annotate(geom="text",x = c(.21,.5), y = c(4.4,1.5), 
           label=c("YouGov: Comments in survey\n(frequent commenters only)",
                   "Facebook: Comments on platform"),
           color=c("black","red3"),vjust=c(.5,0), size = 3)+
  xlim(0,1)+ylim(0,5)+theme(panel.grid.major=element_blank(),
                            panel.grid.minor=element_blank(),
                            axis.text = element_text(color = "black"),
                            axis.title = element_text(color = "black"),
                            legend.position="none")
ggsave("fig5-fb-vs-yg-robust-commenter.pdf",width=5,height=3)

fbyg%>%filter(fb3==2&!is.na(toxic))%>%select(data)%>%table()


########################## Figure D3  ####################################


ggplot(subset(fbyg,wordcount>2),aes(x=toxic,color=data,fill=data,linetype=data))+geom_density(bw=.05,alpha=0.2,size=.2)+
  annotate(geom="text",x=.5,y=2.7,label=c("Toxicity = 0.5
                                        \nGOOD LEAVE FOR AFRICA
                                        \nShould of been sterilized"),size=1.5)+
  annotate(geom="text",x=.74,y=2.7,label=c("Toxicity = 0.75
                                        \nCriminal!!!!
                                        \nDisgusting..."),size=1.5)+
  annotate(geom="text",x=.94,y=2.7,label=c("Toxicity = 0.99
                                        \nShe's a stupid, worthless b*  
                                        \nRacist ass motherf*****"),size=1.5)+
  scale_color_manual(name = c(""), values=c("grey2","red3"))+
  scale_fill_manual(name = c(""),values=c("grey2","red3"))+
  scale_linetype_manual(name = c(""),values=c("solid","dashed"))+
  labs(x="Toxicity",y="")+ theme_bw()+
  #annotate(geom="text",x=0.5,y=4.8,label="Mean Difference = 0.124",size=2.5)+
  annotate("curve", x = c(.14,.5), y = c(4.1,1.4), 
           xend = c(.11,.5), yend = c(3.8,.8), color=c("grey2","red3"), linetype=c("solid","dashed"),
           arrow = arrow(angle = 30, length = unit(1, "mm")),size=0.2,alpha=0.7, curvature = .1)+
  annotate(geom="text",x = c(.21,.5), y = c(4.3,1.6), 
           label=c("YouGov: Comments in survey",
                   "Facebook: Comments on platform"),
           color=c("black","red3"),vjust=c(.5,0), size = 3)+
  xlim(0,1)+ylim(0,5)+theme(panel.grid.major=element_blank(),
                            panel.grid.minor=element_blank(),
                            axis.text = element_text(color = "black"),
                            axis.title = element_text(color = "black"),
                            legend.position="none")
ggsave("fig5-fb-vs-yg-robust-2-words-dropped.pdf",width=5,height=3)



########################## Figure D4  ####################################



ggplot(subset(fbyg,kavanaugh.issue==1),aes(x=toxic,color=data,fill=data,linetype=data))+geom_density(bw=.05,alpha=0.2,size=.2)+
  scale_color_manual(name = c(""), values=c("grey2","red3"))+
  scale_fill_manual(name = c(""),values=c("grey2","red3"))+
  scale_linetype_manual(name = c(""),values=c("solid","dashed"))+
  labs(x="Toxicity",y="")+ theme_bw()+
  xlim(0,1)+ylim(0,5)+theme(panel.grid.major=element_blank(),
                            panel.grid.minor=element_blank(),
                            axis.text = element_text(color = "black"),
                            axis.title = element_text(color = "black"),
                            legend.position = "bottom")
ggsave("appendix-kav1.pdf",width=3,height=3)

ggplot(subset(fbyg,kavanaugh.issue==0),aes(x=toxic,color=data,fill=data,linetype=data))+geom_density(bw=.05,alpha=0.2,size=.2)+
  scale_color_manual(name = c(""), values=c("grey2","red3"))+
  scale_fill_manual(name = c(""),values=c("grey2","red3"))+
  scale_linetype_manual(name = c(""),values=c("solid","dashed"))+
  labs(x="Toxicity",y="")+ theme_bw()+
  xlim(0,1)+ylim(0,5)+theme(panel.grid.major=element_blank(),
                            panel.grid.minor=element_blank(),
                            axis.text = element_text(color = "black"),
                            axis.title = element_text(color = "black"),
                            legend.position = "bottom")
ggsave("appendix-kav2.pdf",width=3,height=3)

table(fbyg$kavanaugh.issue, fbyg$data)


ggplot(subset(fbyg,midterm==1),aes(x=toxic,color=data,fill=data,linetype=data))+geom_density(bw=.05,alpha=0.2,size=.2)+
  scale_color_manual(name = c(""), values=c("grey2","red3"))+
  scale_fill_manual(name = c(""),values=c("grey2","red3"))+
  scale_linetype_manual(name = c(""),values=c("solid","dashed"))+
  labs(x="Toxicity",y="")+ theme_bw()+
  xlim(0,1)+ylim(0,5)+theme(panel.grid.major=element_blank(),
                            panel.grid.minor=element_blank(),
                            axis.text = element_text(color = "black"),
                            axis.title = element_text(color = "black"),
                            legend.position = "bottom")
ggsave("appendix-midterm1.pdf",width=3,height=3)

ggplot(subset(fbyg,midterm==0),aes(x=toxic,color=data,fill=data,linetype=data))+geom_density(bw=.05,alpha=0.2,size=.2)+
  scale_color_manual(name = c(""), values=c("grey2","red3"))+
  scale_fill_manual(name = c(""),values=c("grey2","red3"))+
  scale_linetype_manual(name = c(""),values=c("solid","dashed"))+
  labs(x="Toxicity",y="")+ theme_bw()+
  xlim(0,1)+ylim(0,5)+theme(panel.grid.major=element_blank(),
                            panel.grid.minor=element_blank(),
                            axis.text = element_text(color = "black"),
                            axis.title = element_text(color = "black"),
                            legend.position = "bottom")
ggsave("appendix-midterm2.pdf",width=3,height=3)

table(fbyg$midterm, fbyg$data)

########################## Figure D5  ####################################

ggplot(subset(fbyg,outlet3==0),aes(x=toxic,color=data,fill=data,linetype=data))+geom_density(bw=.05,alpha=0.2,size=.2)+
  scale_color_manual(name = c(""), values=c("grey2","red3"))+
  scale_fill_manual(name = c(""),values=c("grey2","red3"))+
  scale_linetype_manual(name = c(""),values=c("solid","dashed"))+
  labs(x="Toxicity",y="")+ theme_bw()+
  xlim(0,1)+ylim(0,5)+theme(panel.grid.major=element_blank(),
                            panel.grid.minor=element_blank(),
                            axis.text = element_text(color = "black"),
                            axis.title = element_text(color = "black"),
                            legend.position = "bottom")
ggsave("appendix-neutral.pdf",width=3,height=3)

lm(toxic~data,subset(fbyg,outlet3==0))
filter(fbyg,outlet3==0)$data%>%table()

# Call:
#   lm(formula = toxic ~ data, data = subset(fbyg, outlet3 == 0))
# 
# Coefficients:
#   (Intercept)  dataFacebook  
# 0.1792        0.1265  

ggplot(subset(fbyg,outlet3==1),aes(x=toxic,color=data,fill=data,linetype=data))+geom_density(bw=.05,alpha=0.2,size=.2)+
  scale_color_manual(name = c(""), values=c("grey2","red3"))+
  scale_fill_manual(name = c(""),values=c("grey2","red3"))+
  scale_linetype_manual(name = c(""),values=c("solid","dashed"))+
  labs(x="Toxicity",y="")+ theme_bw()+
  xlim(0,1)+ylim(0,5)+theme(panel.grid.major=element_blank(),
                            panel.grid.minor=element_blank(),
                            axis.text = element_text(color = "black"),
                            axis.title = element_text(color = "black"),
                            legend.position = "bottom")
ggsave("appendix-liberal.pdf",width=3,height=3)

lm(toxic~data,subset(fbyg,outlet3==1))
filter(fbyg,outlet3==1)$data%>%table()

# Call:
#   lm(formula = toxic ~ data, data = subset(fbyg, outlet3 == 1))
# 
# Coefficients:
#   (Intercept)  dataFacebook  
# 0.1857        0.1288  

ggplot(subset(fbyg,outlet3==2),aes(x=toxic,color=data,fill=data,linetype=data))+geom_density(bw=.05,alpha=0.2,size=.2)+
  scale_color_manual(name = c(""), values=c("grey2","red3"))+
  scale_fill_manual(name = c(""),values=c("grey2","red3"))+
  scale_linetype_manual(name = c(""),values=c("solid","dashed"))+
  labs(x="Toxicity",y="")+ theme_bw()+
  xlim(0,1)+ylim(0,5)+theme(panel.grid.major=element_blank(),
                            panel.grid.minor=element_blank(),
                            axis.text = element_text(color = "black"),
                            axis.title = element_text(color = "black"),
                            legend.position = "bottom")
ggsave("appendix-conservative.pdf",width=3,height=3)

lm(toxic~data,subset(fbyg,outlet3==2))
filter(fbyg,outlet3==2)$data%>%table()

# Coefficients:
#   (Intercept)  dataFacebook  
# 0.1885        0.1524  

########################## Figure D6  ####################################



ggplot(outlet, aes (x = outlet_bias, y = toxic))+
  geom_smooth(color = "grey20", aes(weight = n))+
  ggrepel::geom_text_repel(aes(label = short, size = n))+
  # geom_point(aes(size = n, label = short), alpha = 0.2)+
  labs(x = "Share of Republicans", y = "Average toxicity")+
  theme_bw()+
  theme(panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        panel.border=element_rect(colour="black"),
        strip.background = element_rect(fill ="white",color = "white",size=1),
        axis.text.x = element_text(color = "black"),
        axis.text.y = element_text(color = "black"),
        legend.position = "none")
ggsave("appendix-outlet-toxicity.pdf",width=6,height=6)


################################  Table D1  #####################################



stargazer(f2.models,
          keep.stat=c("n","adj.rsq"),
          add.lines =  list(c("Models","(1)","(2)","(3)","(4)","(5)",
                              "(6)","(7)","(8)","(9)","(10)"),
                            c("Data","ANES","ANES","ANES","ANES",
                              "Pew","Pew","Pew","YG","YG","YG"),
                            c("DV","Interest","Knowledge","PID strength","Need to evaluate",
                              "Interest","Knowledge","PID strength",
                              "Interest","Knowledge","PID strength")),
          dep.var.labels.include=FALSE,
          covariate.labels = c("Commented in past year",
                               "Hardly ever",
                               "Sometimes",
                               "Often",
                               "A few times a week or less often",
                               "About once a day or more often"),
          star.cutoffs = c(0.05,0.01,0.005),
          stype="apsr")




################################  Table D2  #####################################


stargazer(f3.models[1:8],
          keep.stat=c("n","adj.rsq"),
          add.lines =  list(c("DV","Like Trump","Dislike Clinton","Like Reps","Dislike Dems",
                              "Conservative","Pro life","Oppose ACA",
                              "Anti-immigration")),
          dep.var.labels.include=FALSE,
          covariate.labels = c("Republican",
                               "Commented past year",
                               "Republican X Commented"),
          star.cutoffs = c(0.05,0.01,0.005),
          title = "Regression estimates for Figure 3 (ANES)",
          stype="apsr")


################################  Table D3  #####################################


stargazer(f3.models[9:16],
          keep.stat=c("n","adj.rsq"),
          add.lines =  list(c("DV","Like Trump","Dislike Clinton","Like Reps","Dislike Dems",
                              "Conservative","Pro life","Oppose ACA",
                              "Anti-immigration")),
          dep.var.labels.include=FALSE,
          covariate.labels = c("Republican",
                               "Comment hardly ever",
                               "Comment sometimes",
                               "Comment often",                               
                               "Republican X hardly",
                               "Republican X sometimes",
                               "Republican X often"),
          star.cutoffs = c(0.05,0.01,0.005),
          title = "Regression estimates for Figure 3 (Pew)",
          stype="apsr")

################################  Table D4  #####################################

stargazer(f3.models[17:18],
          keep.stat=c("n","adj.rsq"),
          add.lines =  list(c("DV","Support Trump",
                              "Conservative")),
          dep.var.labels.include=FALSE,
          covariate.labels = c("Republican",
                               "Comment a few times a week or less often on Facebook",
                               "Comment about once a day or more often on Facebook",
                               "Republican X less often",
                               "Republican X more often"),
          star.cutoffs = c(0.05,0.01,0.005),
          title = "Regression estimates for Figure 3 (YouGov)",
          stype="apsr")



########################## Table D5  ####################################

# YG
m1<-felm(toxic~as.factor(fb3)|0|0|caseid,yg3,weights=yg3$weight)
m2<-felm(toxic~as.factor(fb3)|post_id|0|caseid,yg3,weights=yg3$weight)
m3<-felm(toxic~as.factor(write)|0|0|caseid,yg3,weights=yg3$weight)
m4<-felm(toxic~as.factor(write)|post_id|0|caseid,yg3,weights=yg3$weight)


stargazer(m1,m2,m3,m4,
          keep.stat=c("n","adj.rsq"),
          add.lines =  list(c("Article fixed effects","No","Yes","Bo","Yes")),
          #                     "Anti-immigration")),
          dep.var.labels.include=FALSE,
          covariate.labels = c("Comment a few times a week or less often on Facebook",
                               "Comment about once a day or more often on Facebook",
                               "Probably would not comment on the presented article",
                               "Probably would comment on the presented article",
                               "Definitely would comment on the presented article"),
          star.cutoffs = c(0.05,0.01,0.005),
          title = "Toxicity of comments by commenters vs. non-commenters (YouGov)",
          stype="apsr")

########################## Table D6  ####################################


# YG vs FB
m1<-felm(toxic~data|0|0|caseid,fbyg,weights = fbyg$weight)
m2<-felm(toxic~data|post_id|0|caseid,fbyg,weights = fbyg$weight)
m3<-felm(toxic~data|0|0|caseid,subset(fbyg, wordcount>2),weights = subset(fbyg, wordcount>2)$weight)
m4<-felm(toxic~data|post_id|0|caseid,subset(fbyg, wordcount>2),weights = subset(fbyg, wordcount>2)$weight)
m5<-felm(toxic~data|0|0|caseid,subset(fbyg, fb3==2),weights = subset(fbyg, fb3==2)$weight)
m6<-felm(toxic~data|post_id|0|caseid,subset(fbyg, fb3==2),weights = subset(fbyg, fb3==2)$weight)

stargazer(m1,m2,m3,m4,m5,m6,
          keep.stat=c("n","adj.rsq"),
          add.lines =  list(c("Article fixed effects","No","Yes","No","Yes","No","Yes"),
                            c("Less than 2 words dropped","No","No","Yes","Yes","No","No"),
                            c("Frequent commenters only (YouGov)","No","No","No","No","Yes","Yes")),
          dep.var.labels.include=FALSE,
          covariate.labels = c("Data = Facebook"),
          star.cutoffs = c(0.05,0.01,0.005),
          title = "Comment toxicity on Facebook versus a nationally representative sample",
          stype="apsr")


########################## Table D7  ####################################

w1 <- felm(write ~ cond|0 |0| caseid, yg3)
w2 <- felm(write ~ meantox|0 |0| caseid, yg3)
w3 <- felm(write ~ cond*meantox|0 |0| caseid, yg3)
w4 <- felm(write ~ cond|post_id |0| caseid, yg3)
w5 <- felm(write ~ cond*meantox|post_id |0| caseid, yg3)

t1 <- felm(toxic ~ cond|0 |0| caseid, yg3)
t2 <- felm(toxic ~ meantox|0 |0| caseid, yg3)
t3 <- felm(toxic ~ cond*meantox|0 |0| caseid, yg3)
t4 <- felm(toxic ~ cond|post_id |0| caseid, yg3)
t5 <- felm(toxic ~ cond*meantox|post_id |0| caseid, yg3)


stargazer(w1,w2,w3,w4,w5,t1,t2,t3,t4,t5, type="text",
          keep = c("cond1","meantox","cond1:meantox","Constant"),
          keep.stat=c("n","adj.rsq"),
          model.numbers = F,
          dep.var.labels = c("DV = Willingness to comment","DV = Toxicity of YG respondent comments"),
          covariate.labels = c("Treatment (top comments shown)",
                               "Average toxicity of top comments",
                               "Treatment $\times$ top comments toxicity",
                               "Constant"),
          add.lines = list(c("Article fixed effects","No","No","No","Yes","Yes","No","No","No","Yes","Yes")),
          star.cutoffs = c(0.05,0.01,0.005),
          notes = "Standard errors are clustered at the respondent level.",
          column.labels = c("(1)","(2)","(3)","(4)","(5)","(6)","(7)","(8)","(9)","(10)"),
          style = "apsr")




######################## Appendix E ################################################


#### FIG VAL
labelr%>%select(toxic,picked,labelr)%>%cor() # r = .79
labelr%>%subset(data=="fb")%>%select(toxic,picked,labelr)%>%cor()  # r = .79
labelr%>%subset(data=="yg")%>%select(toxic,picked,labelr)%>%cor()  # r = .79

labelr$m<-"m"


g1<-ggplot(labelr, aes(x=toxic, y = picked, color = "m"))+
  geom_smooth(size=0.2, alpha=0.2)+geom_point(alpha=0.1, position = "jitter", size = 0.5, shape = 16)+theme_bw()+
  labs(x = "Perspective API toxicity", y = "Pairwise toxicity rating")+
  scale_color_manual(name = c(""),
                     breaks=c("m"),
                     labels = c("Pooled (r = 0.79)"),
                     values=c("grey2"))+
  theme(panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        panel.border=element_rect(colour="black"),
        legend.position = c(0.80, 0.15),
        legend.text=element_text(size = 6),
        legend.key.size = unit(2, 'mm'),
        strip.background = element_rect(fill ="white",color = "white",size=1),
        axis.text.x = element_text(color = "black"),
        axis.text.y = element_text(color = "black"))


g2<-ggplot(labelr, aes(x=toxic, y = picked, color = data, fill = data, shape = data, linetype = data))+
  geom_smooth(size=0.2, alpha=0.2)+geom_point(alpha=0.1, position = "jitter", size = 0.5)+theme_bw()+
  scale_color_manual(name = c(""),
                     breaks=c("fb","yg"),
                     labels = c("FB (r = 0.79)", "YG (r = 0.79)"),
                     values=c("red3","grey2"))+
  scale_fill_manual(name = c(""),
                    breaks=c("fb","yg"),
                    labels = c("FB (r = 0.79)", "YG (r = 0.79)"),
                    values=c("red3","grey2"))+
  scale_shape_manual(name = c(""),
                     breaks=c("fb","yg"),
                     labels = c("FB (r = 0.79)", "YG (r = 0.79)"),
                     values=c(16,17))+
  scale_linetype_manual(name = c(""),
                        breaks=c("fb","yg"),
                        labels = c("FB (r = 0.79)", "YG (r = 0.79)"),
                        values=c(2,1))+
  labs(x = "Perspective API toxicity", y = "Pairwise toxicity rating")+
  theme(panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        panel.border=element_rect(colour="black"),
        legend.position = c(0.80, 0.15),
        legend.text=element_text(size = 6),
        legend.key.size = unit(2, 'mm'),
        strip.background = element_rect(fill ="white",color = "white",size=1),
        axis.text.x = element_text(color = "black"),
        axis.text.y = element_text(color = "black"))+
  theme(axis.text.y = element_blank())

g1+g2
ggsave("fig-val1.pdf",width=6,height=3.25)


val.data%>%subset(data=="fb")%>%select(toxic,narrow)%>%cor()
val.data%>%subset(data=="yg")%>%select(toxic,narrow)%>%cor()



g3<-ggplot(val.data, aes(x=toxic, y = narrow, color = "m"))+
  geom_smooth(size=0.2, alpha=0.2)+
  labs(x = "Perspective API toxicity", y = "Pr (Uncivil word dictionary)")+
  scale_color_manual(name = c(""),
                     breaks=c("m"),
                     labels = c("Pooled (r = 0.65)"),
                     values=c("grey2"))+theme_bw()+
  theme(panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        panel.border=element_rect(colour="black"),
        legend.position = c(0.80, 0.15),
        legend.text=element_text(size = 6),
        legend.key.size = unit(2, 'mm'),
        strip.background = element_rect(fill ="white",color = "white",size=1),
        axis.text.x = element_text(color = "black"),
        axis.text.y = element_text(color = "black"))



g4<-ggplot(val.data, aes(x=toxic, y = narrow, color = data, fill = data, linetype = data))+
  geom_smooth(size=0.2, alpha=0.2)+theme_bw()+
  scale_color_manual(name = c(""),
                     breaks=c("fb","yg"),
                     labels = c("FB (r = 0.65)", "YG (r = 0.69)"),
                     values=c("red3","grey2"))+
  scale_fill_manual(name = c(""),
                    breaks=c("fb","yg"),
                    labels = c("FB (r = 0.65)", "YG (r = 0.69)"),
                    values=c("red3","grey2"))+
  scale_linetype_manual(name = c(""),
                        breaks=c("fb","yg"),
                        labels = c("FB (r = 0.65)", "YG (r = 0.69)"),
                        values=c(2,1))+
  labs(x = "Perspective API toxicity", y = "Pr (Uncivil word dictionary)")+
  theme(panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        panel.border=element_rect(colour="black"),
        legend.position = c(0.80, 0.15),
        legend.text=element_text(size = 6),
        legend.key.size = unit(2, 'mm'),
        strip.background = element_rect(fill ="white",color = "white",size=1),
        axis.text.x = element_text(color = "black"),
        axis.text.y = element_text(color = "black"))+
  theme(axis.text.y = element_blank())

g3+g4
ggsave("fig-val2.pdf",width=6,height=3.25)
